{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# PyGrunn - `vaex` - Machine Learning example: the \"server\" side"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-05-13T09:46:39.647754Z",
     "start_time": "2019-05-13T09:46:38.296195Z"
    }
   },
   "outputs": [],
   "source": [
    "import vaex\n",
    "import vaex.ml"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Read in the data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-05-13T09:46:39.726360Z",
     "start_time": "2019-05-13T09:46:39.649378Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "This test set has 1000000 samples.\n"
     ]
    }
   ],
   "source": [
    "test = vaex.open('./data/test_set.arrow')\n",
    "print('This test set has %i samples.' % len(test))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Inspect the test set"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-05-13T09:46:39.765227Z",
     "start_time": "2019-05-13T09:46:39.728403Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table>\n",
       "<thead>\n",
       "<tr><th>#                                  </th><th>vendor_id  </th><th>pickup_datetime              </th><th>dropoff_datetime             </th><th>passenger_count  </th><th>trip_distance  </th><th>pickup_longitude  </th><th>pickup_latitude  </th><th>rate_code  </th><th>store_and_fwd_flag  </th><th>dropoff_longitude  </th><th>dropoff_latitude  </th><th>payment_type  </th><th>fare_amount  </th><th>surcharge  </th><th>mta_tax  </th><th>tip_amount  </th><th>tolls_amount  </th><th>total_amount  </th></tr>\n",
       "</thead>\n",
       "<tbody>\n",
       "<tr><td><i style='opacity: 0.6'>0</i>      </td><td>CMT        </td><td>2013-01-01 15:11:48.000000000</td><td>2013-01-01 15:18:10.000000000</td><td>4                </td><td>1.0            </td><td>-73.978165        </td><td>40.757977        </td><td>1.0        </td><td>0.0                 </td><td>-73.98984          </td><td>40.751173         </td><td>CSH           </td><td>6.5          </td><td>0.0        </td><td>0.5      </td><td>0.0         </td><td>0.0           </td><td>7.0           </td></tr>\n",
       "<tr><td><i style='opacity: 0.6'>1</i>      </td><td>CMT        </td><td>2013-01-06 00:18:35.000000000</td><td>2013-01-06 00:22:54.000000000</td><td>1                </td><td>1.5            </td><td>-74.00668         </td><td>40.731781        </td><td>1.0        </td><td>0.0                 </td><td>-73.994499         </td><td>40.750659         </td><td>CSH           </td><td>6.0          </td><td>0.5        </td><td>0.5      </td><td>0.0         </td><td>0.0           </td><td>7.0           </td></tr>\n",
       "<tr><td><i style='opacity: 0.6'>2</i>      </td><td>CMT        </td><td>2013-01-05 18:49:41.000000000</td><td>2013-01-05 18:54:23.000000000</td><td>1                </td><td>1.1            </td><td>-74.004711        </td><td>40.73777         </td><td>1.0        </td><td>0.0                 </td><td>-74.009831         </td><td>40.726            </td><td>CSH           </td><td>5.5          </td><td>1.0        </td><td>0.5      </td><td>0.0         </td><td>0.0           </td><td>7.0           </td></tr>\n",
       "<tr><td><i style='opacity: 0.6'>3</i>      </td><td>CMT        </td><td>2013-01-07 23:54:15.000000000</td><td>2013-01-07 23:58:20.000000000</td><td>2                </td><td>0.7            </td><td>-73.97459999999998</td><td>40.759945        </td><td>1.0        </td><td>0.0                 </td><td>-73.98473699999998 </td><td>40.759388         </td><td>CSH           </td><td>5.0          </td><td>0.5        </td><td>0.5      </td><td>0.0         </td><td>0.0           </td><td>6.0           </td></tr>\n",
       "<tr><td><i style='opacity: 0.6'>4</i>      </td><td>CMT        </td><td>2013-01-07 23:25:03.000000000</td><td>2013-01-07 23:34:24.000000000</td><td>1                </td><td>2.1            </td><td>-73.976252        </td><td>40.748528        </td><td>1.0        </td><td>0.0                 </td><td>-74.002583         </td><td>40.747867         </td><td>CSH           </td><td>9.5          </td><td>0.5        </td><td>0.5      </td><td>0.0         </td><td>0.0           </td><td>10.5          </td></tr>\n",
       "<tr><td>...                                </td><td>...        </td><td>...                          </td><td>...                          </td><td>...              </td><td>...            </td><td>...               </td><td>...              </td><td>...        </td><td>...                 </td><td>...                </td><td>...               </td><td>...           </td><td>...          </td><td>...        </td><td>...      </td><td>...         </td><td>...           </td><td>...           </td></tr>\n",
       "<tr><td><i style='opacity: 0.6'>999,995</i></td><td>VTS        </td><td>2013-01-01 12:34:00.000000000</td><td>2013-01-01 12:39:00.000000000</td><td>6                </td><td>1.17           </td><td>-73.974007        </td><td>40.764175        </td><td>1.0        </td><td>nan                 </td><td>-73.96455          </td><td>40.773002         </td><td>CRD           </td><td>5.5          </td><td>0.0        </td><td>0.5      </td><td>1.1         </td><td>0.0           </td><td>7.1           </td></tr>\n",
       "<tr><td><i style='opacity: 0.6'>999,996</i></td><td>VTS        </td><td>2013-01-01 12:01:00.000000000</td><td>2013-01-01 12:36:00.000000000</td><td>2                </td><td>12.37          </td><td>-73.87305999999998</td><td>40.774145        </td><td>1.0        </td><td>nan                 </td><td>-73.98869          </td><td>40.747712         </td><td>CSH           </td><td>40.0         </td><td>0.0        </td><td>0.5      </td><td>0.0         </td><td>4.8           </td><td>45.3          </td></tr>\n",
       "<tr><td><i style='opacity: 0.6'>999,997</i></td><td>VTS        </td><td>2013-01-01 12:28:00.000000000</td><td>2013-01-01 12:38:00.000000000</td><td>4                </td><td>3.02           </td><td>-73.98922299999998</td><td>40.753687        </td><td>1.0        </td><td>nan                 </td><td>-74.010683         </td><td>40.713553         </td><td>CSH           </td><td>11.0         </td><td>0.0        </td><td>0.5      </td><td>0.0         </td><td>0.0           </td><td>11.5          </td></tr>\n",
       "<tr><td><i style='opacity: 0.6'>999,998</i></td><td>VTS        </td><td>2013-01-01 12:36:00.000000000</td><td>2013-01-01 12:36:00.000000000</td><td>1                </td><td>0.0            </td><td>0.0               </td><td>0.0              </td><td>1.0        </td><td>nan                 </td><td>0.0                </td><td>0.0               </td><td>CSH           </td><td>2.5          </td><td>0.0        </td><td>0.5      </td><td>0.0         </td><td>0.0           </td><td>3.0           </td></tr>\n",
       "<tr><td><i style='opacity: 0.6'>999,999</i></td><td>VTS        </td><td>2013-01-01 12:35:00.000000000</td><td>2013-01-01 12:42:00.000000000</td><td>1                </td><td>1.54           </td><td>-73.969925        </td><td>40.793903        </td><td>1.0        </td><td>nan                 </td><td>-73.945052         </td><td>40.783328         </td><td>CSH           </td><td>7.5          </td><td>0.0        </td><td>0.5      </td><td>0.0         </td><td>0.0           </td><td>8.0           </td></tr>\n",
       "</tbody>\n",
       "</table>"
      ],
      "text/plain": [
       "#        vendor_id    pickup_datetime                dropoff_datetime               passenger_count    trip_distance    pickup_longitude    pickup_latitude    rate_code    store_and_fwd_flag    dropoff_longitude    dropoff_latitude    payment_type    fare_amount    surcharge    mta_tax    tip_amount    tolls_amount    total_amount\n",
       "0        CMT          2013-01-01 15:11:48.000000000  2013-01-01 15:18:10.000000000  4                  1.0              -73.978165          40.757977          1.0          0.0                   -73.98984            40.751173           CSH             6.5            0.0          0.5        0.0           0.0             7.0\n",
       "1        CMT          2013-01-06 00:18:35.000000000  2013-01-06 00:22:54.000000000  1                  1.5              -74.00668           40.731781          1.0          0.0                   -73.994499           40.750659           CSH             6.0            0.5          0.5        0.0           0.0             7.0\n",
       "2        CMT          2013-01-05 18:49:41.000000000  2013-01-05 18:54:23.000000000  1                  1.1              -74.004711          40.73777           1.0          0.0                   -74.009831           40.726              CSH             5.5            1.0          0.5        0.0           0.0             7.0\n",
       "3        CMT          2013-01-07 23:54:15.000000000  2013-01-07 23:58:20.000000000  2                  0.7              -73.97459999999998  40.759945          1.0          0.0                   -73.98473699999998   40.759388           CSH             5.0            0.5          0.5        0.0           0.0             6.0\n",
       "4        CMT          2013-01-07 23:25:03.000000000  2013-01-07 23:34:24.000000000  1                  2.1              -73.976252          40.748528          1.0          0.0                   -74.002583           40.747867           CSH             9.5            0.5          0.5        0.0           0.0             10.5\n",
       "...      ...          ...                            ...                            ...                ...              ...                 ...                ...          ...                   ...                  ...                 ...             ...            ...          ...        ...           ...             ...\n",
       "999,995  VTS          2013-01-01 12:34:00.000000000  2013-01-01 12:39:00.000000000  6                  1.17             -73.974007          40.764175          1.0          nan                   -73.96455            40.773002           CRD             5.5            0.0          0.5        1.1           0.0             7.1\n",
       "999,996  VTS          2013-01-01 12:01:00.000000000  2013-01-01 12:36:00.000000000  2                  12.37            -73.87305999999998  40.774145          1.0          nan                   -73.98869            40.747712           CSH             40.0           0.0          0.5        0.0           4.8             45.3\n",
       "999,997  VTS          2013-01-01 12:28:00.000000000  2013-01-01 12:38:00.000000000  4                  3.02             -73.98922299999998  40.753687          1.0          nan                   -74.010683           40.713553           CSH             11.0           0.0          0.5        0.0           0.0             11.5\n",
       "999,998  VTS          2013-01-01 12:36:00.000000000  2013-01-01 12:36:00.000000000  1                  0.0              0.0                 0.0                1.0          nan                   0.0                  0.0                 CSH             2.5            0.0          0.5        0.0           0.0             3.0\n",
       "999,999  VTS          2013-01-01 12:35:00.000000000  2013-01-01 12:42:00.000000000  1                  1.54             -73.969925          40.793903          1.0          nan                   -73.945052           40.783328           CSH             7.5            0.0          0.5        0.0           0.0             8.0"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Load the state into the test DataFrame"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-05-13T09:46:40.648504Z",
     "start_time": "2019-05-13T09:46:39.767074Z"
    }
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/jovan/miniconda3/lib/python3.7/site-packages/lightgbm/__init__.py:46: UserWarning: Starting from version 2.2.1, the library file in distribution wheels for macOS is built by the Apple Clang (Xcode_8.3.1) compiler.\n",
      "This means that in case of installing LightGBM from PyPI via the ``pip install lightgbm`` command, you don't need to install the gcc compiler anymore.\n",
      "Instead of that, you need to install the OpenMP library, which is required for running LightGBM on the system with the Apple Clang compiler.\n",
      "You can install the OpenMP library by the following command: ``brew install libomp``.\n",
      "  \"You can install the OpenMP library by the following command: ``brew install libomp``.\", UserWarning)\n"
     ]
    }
   ],
   "source": [
    "# This is where the magic happens\n",
    "test.state_load('./data/vaex_model_pipeline.json')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-05-13T09:46:40.979474Z",
     "start_time": "2019-05-13T09:46:40.650409Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table>\n",
       "<thead>\n",
       "<tr><th>#                                  </th><th>vendor_id  </th><th>pickup_datetime              </th><th>dropoff_datetime             </th><th>passenger_count  </th><th>trip_distance  </th><th>pickup_longitude  </th><th>pickup_latitude  </th><th>rate_code  </th><th>store_and_fwd_flag  </th><th>dropoff_longitude  </th><th>dropoff_latitude  </th><th>payment_type  </th><th>fare_amount  </th><th>surcharge  </th><th>mta_tax  </th><th>tip_amount  </th><th>tolls_amount  </th><th>total_amount  </th><th>trip_duration_min  </th><th>pu_hour  </th><th>pu_day_of_week  </th><th>pu_is_weekend  </th><th>arc_distance_miles  </th><th>direction_angle    </th><th>PCA_0                 </th><th>PCA_1                 </th><th>PCA_2                </th><th>PCA_3                 </th><th>label_encoded_vendor_id  </th><th>lightgbm_prediction  </th></tr>\n",
       "</thead>\n",
       "<tbody>\n",
       "<tr><td><i style='opacity: 0.6'>0</i>      </td><td>CMT        </td><td>2013-01-01 15:11:48.000000000</td><td>2013-01-01 15:18:10.000000000</td><td>4                </td><td>1.0            </td><td>-73.978165        </td><td>40.757977        </td><td>1.0        </td><td>0.0                 </td><td>-73.98984          </td><td>40.751173         </td><td>CSH           </td><td>6.5          </td><td>0.0        </td><td>0.5      </td><td>0.0         </td><td>0.0           </td><td>7.0           </td><td>6.366666666666666  </td><td>15       </td><td>1               </td><td>0              </td><td>0.8170350797859289  </td><td>-120.23293627513229</td><td>0.005563989582974179  </td><td>0.0034554530129773824 </td><td>-0.007398977515577462</td><td>0.009276893157306015  </td><td>0                        </td><td>7.364544196271947    </td></tr>\n",
       "<tr><td><i style='opacity: 0.6'>1</i>      </td><td>CMT        </td><td>2013-01-06 00:18:35.000000000</td><td>2013-01-06 00:22:54.000000000</td><td>1                </td><td>1.5            </td><td>-74.00668         </td><td>40.731781        </td><td>1.0        </td><td>0.0                 </td><td>-73.994499         </td><td>40.750659         </td><td>CSH           </td><td>6.0          </td><td>0.5        </td><td>0.5      </td><td>0.0         </td><td>0.0           </td><td>7.0           </td><td>4.316666666666666  </td><td>0        </td><td>6               </td><td>1              </td><td>0.915206089778209   </td><td>32.832067311173155 </td><td>-0.033099698565622254 </td><td>0.0055661068803717975 </td><td>-0.010575714579848391</td><td>0.012723459365073513  </td><td>0                        </td><td>6.422300378809424    </td></tr>\n",
       "<tr><td><i style='opacity: 0.6'>2</i>      </td><td>CMT        </td><td>2013-01-05 18:49:41.000000000</td><td>2013-01-05 18:54:23.000000000</td><td>1                </td><td>1.1            </td><td>-74.004711        </td><td>40.73777         </td><td>1.0        </td><td>0.0                 </td><td>-74.009831         </td><td>40.726            </td><td>CSH           </td><td>5.5          </td><td>1.0        </td><td>0.5      </td><td>0.0         </td><td>0.0           </td><td>7.0           </td><td>4.7                </td><td>18       </td><td>5               </td><td>1              </td><td>0.4187481463954876  </td><td>-156.4907551070662 </td><td>-0.027438352569111583 </td><td>0.008339956823645151  </td><td>-0.039523027790593815</td><td>0.010445398154522724  </td><td>0                        </td><td>6.431858147806219    </td></tr>\n",
       "<tr><td><i style='opacity: 0.6'>3</i>      </td><td>CMT        </td><td>2013-01-07 23:54:15.000000000</td><td>2013-01-07 23:58:20.000000000</td><td>2                </td><td>0.7            </td><td>-73.97459999999998</td><td>40.759945        </td><td>1.0        </td><td>0.0                 </td><td>-73.98473699999998 </td><td>40.759388         </td><td>CSH           </td><td>5.0          </td><td>0.5        </td><td>0.5      </td><td>0.0         </td><td>0.0           </td><td>6.0           </td><td>4.083333333333333  </td><td>23       </td><td>0               </td><td>0              </td><td>0.7004873614138178  </td><td>-93.14508131372725 </td><td>0.009462373222828913  </td><td>0.0022786548158773585 </td><td>0.0022418228681065477</td><td>0.01003965259655025   </td><td>0                        </td><td>5.840539501229937    </td></tr>\n",
       "<tr><td><i style='opacity: 0.6'>4</i>      </td><td>CMT        </td><td>2013-01-07 23:25:03.000000000</td><td>2013-01-07 23:34:24.000000000</td><td>1                </td><td>2.1            </td><td>-73.976252        </td><td>40.748528        </td><td>1.0        </td><td>0.0                 </td><td>-74.002583         </td><td>40.747867         </td><td>CSH           </td><td>9.5          </td><td>0.5        </td><td>0.5      </td><td>0.0         </td><td>0.0           </td><td>10.5          </td><td>9.35               </td><td>23       </td><td>0               </td><td>0              </td><td>1.819360209413874   </td><td>-91.43802201585738 </td><td>0.00013781489544046222</td><td>-0.004513223053993152 </td><td>-0.017617758869502797</td><td>0.01757690476215882   </td><td>0                        </td><td>11.853951409124315   </td></tr>\n",
       "<tr><td>...                                </td><td>...        </td><td>...                          </td><td>...                          </td><td>...              </td><td>...            </td><td>...               </td><td>...              </td><td>...        </td><td>...                 </td><td>...                </td><td>...               </td><td>...           </td><td>...          </td><td>...        </td><td>...      </td><td>...         </td><td>...           </td><td>...           </td><td>...                </td><td>...      </td><td>...             </td><td>...            </td><td>...                 </td><td>...                </td><td>...                   </td><td>...                   </td><td>...                  </td><td>...                   </td><td>...                      </td><td>...                  </td></tr>\n",
       "<tr><td><i style='opacity: 0.6'>745,211</i></td><td>VTS        </td><td>2013-01-01 11:44:00.000000000</td><td>2013-01-01 11:48:00.000000000</td><td>2                </td><td>1.01           </td><td>-73.977867        </td><td>40.752242        </td><td>1.0        </td><td>nan                 </td><td>-73.984707         </td><td>40.74635          </td><td>CSH           </td><td>5.5          </td><td>0.0        </td><td>0.5      </td><td>0.0         </td><td>0.0           </td><td>6.0           </td><td>4.0                </td><td>11       </td><td>1               </td><td>0              </td><td>0.4857721080797788  </td><td>-130.74172519596877</td><td>0.0016677981982832183 </td><td>-0.0007634001124341561</td><td>-0.008238445770749805</td><td>0.0022837314225409282 </td><td>1                        </td><td>6.7908121204710366   </td></tr>\n",
       "<tr><td><i style='opacity: 0.6'>745,212</i></td><td>VTS        </td><td>2013-01-01 12:30:00.000000000</td><td>2013-01-01 12:35:00.000000000</td><td>3                </td><td>1.43           </td><td>-73.97408799999998</td><td>40.755217        </td><td>1.0        </td><td>nan                 </td><td>-73.985583         </td><td>40.74194          </td><td>CRD           </td><td>6.5          </td><td>0.0        </td><td>0.5      </td><td>1.3         </td><td>0.0           </td><td>8.3           </td><td>5.0                </td><td>12       </td><td>1               </td><td>0              </td><td>0.8336106186269362  </td><td>-139.11454490848595</td><td>0.0064363219895815765 </td><td>-0.0013900155064279053</td><td>-0.012308814501983484</td><td>0.00037386902126578404</td><td>1                        </td><td>8.219501134401456    </td></tr>\n",
       "<tr><td><i style='opacity: 0.6'>745,213</i></td><td>VTS        </td><td>2013-01-01 12:27:00.000000000</td><td>2013-01-01 12:38:00.000000000</td><td>1                </td><td>3.22           </td><td>-73.98677999999998</td><td>40.74939         </td><td>1.0        </td><td>nan                 </td><td>-74.000422         </td><td>40.723847         </td><td>CSH           </td><td>12.0         </td><td>0.0        </td><td>0.5      </td><td>0.0         </td><td>0.0           </td><td>12.5          </td><td>11.0               </td><td>12       </td><td>1               </td><td>0              </td><td>1.0607977719708614  </td><td>-151.89422599616444</td><td>-0.006598518727099495 </td><td>0.0036233387091474216 </td><td>-0.03567689647266646 </td><td>0.0015926040258109411 </td><td>1                        </td><td>15.97530794255817    </td></tr>\n",
       "<tr><td><i style='opacity: 0.6'>745,214</i></td><td>VTS        </td><td>2013-01-01 12:28:00.000000000</td><td>2013-01-01 12:38:00.000000000</td><td>4                </td><td>3.02           </td><td>-73.98922299999998</td><td>40.753687        </td><td>1.0        </td><td>nan                 </td><td>-74.010683         </td><td>40.713553         </td><td>CSH           </td><td>11.0         </td><td>0.0        </td><td>0.5      </td><td>0.0         </td><td>0.0           </td><td>11.5          </td><td>10.0               </td><td>12       </td><td>1               </td><td>0              </td><td>1.668174698555257   </td><td>-151.86619536928146</td><td>-0.005229616762420809 </td><td>0.008372924523837585  </td><td>-0.05005047116436957 </td><td>0.0037501275093600565 </td><td>1                        </td><td>14.871115191447839   </td></tr>\n",
       "<tr><td><i style='opacity: 0.6'>745,215</i></td><td>VTS        </td><td>2013-01-01 12:35:00.000000000</td><td>2013-01-01 12:42:00.000000000</td><td>1                </td><td>1.54           </td><td>-73.969925        </td><td>40.793903        </td><td>1.0        </td><td>nan                 </td><td>-73.945052         </td><td>40.783328         </td><td>CSH           </td><td>7.5          </td><td>0.0        </td><td>0.5      </td><td>0.0         </td><td>0.0           </td><td>8.0           </td><td>7.0                </td><td>12       </td><td>1               </td><td>0              </td><td>1.7303989127304344  </td><td>113.03324723766758 </td><td>0.03703009568845854   </td><td>0.02265074521077055   </td><td>0.045051966305525375 </td><td>-0.007717442995512018 </td><td>1                        </td><td>8.631783515579428    </td></tr>\n",
       "</tbody>\n",
       "</table>"
      ],
      "text/plain": [
       "#        vendor_id    pickup_datetime                dropoff_datetime               passenger_count    trip_distance    pickup_longitude    pickup_latitude    rate_code    store_and_fwd_flag    dropoff_longitude    dropoff_latitude    payment_type    fare_amount    surcharge    mta_tax    tip_amount    tolls_amount    total_amount    trip_duration_min    pu_hour    pu_day_of_week    pu_is_weekend    arc_distance_miles    direction_angle      PCA_0                   PCA_1                   PCA_2                  PCA_3                   label_encoded_vendor_id    lightgbm_prediction\n",
       "0        CMT          2013-01-01 15:11:48.000000000  2013-01-01 15:18:10.000000000  4                  1.0              -73.978165          40.757977          1.0          0.0                   -73.98984            40.751173           CSH             6.5            0.0          0.5        0.0           0.0             7.0             6.366666666666666    15         1                 0                0.8170350797859289    -120.23293627513229  0.005563989582974179    0.0034554530129773824   -0.007398977515577462  0.009276893157306015    0                          7.364544196271947\n",
       "1        CMT          2013-01-06 00:18:35.000000000  2013-01-06 00:22:54.000000000  1                  1.5              -74.00668           40.731781          1.0          0.0                   -73.994499           40.750659           CSH             6.0            0.5          0.5        0.0           0.0             7.0             4.316666666666666    0          6                 1                0.915206089778209     32.832067311173155   -0.033099698565622254   0.0055661068803717975   -0.010575714579848391  0.012723459365073513    0                          6.422300378809424\n",
       "2        CMT          2013-01-05 18:49:41.000000000  2013-01-05 18:54:23.000000000  1                  1.1              -74.004711          40.73777           1.0          0.0                   -74.009831           40.726              CSH             5.5            1.0          0.5        0.0           0.0             7.0             4.7                  18         5                 1                0.4187481463954876    -156.4907551070662   -0.027438352569111583   0.008339956823645151    -0.039523027790593815  0.010445398154522724    0                          6.431858147806219\n",
       "3        CMT          2013-01-07 23:54:15.000000000  2013-01-07 23:58:20.000000000  2                  0.7              -73.97459999999998  40.759945          1.0          0.0                   -73.98473699999998   40.759388           CSH             5.0            0.5          0.5        0.0           0.0             6.0             4.083333333333333    23         0                 0                0.7004873614138178    -93.14508131372725   0.009462373222828913    0.0022786548158773585   0.0022418228681065477  0.01003965259655025     0                          5.840539501229937\n",
       "4        CMT          2013-01-07 23:25:03.000000000  2013-01-07 23:34:24.000000000  1                  2.1              -73.976252          40.748528          1.0          0.0                   -74.002583           40.747867           CSH             9.5            0.5          0.5        0.0           0.0             10.5            9.35                 23         0                 0                1.819360209413874     -91.43802201585738   0.00013781489544046222  -0.004513223053993152   -0.017617758869502797  0.01757690476215882     0                          11.853951409124315\n",
       "...      ...          ...                            ...                            ...                ...              ...                 ...                ...          ...                   ...                  ...                 ...             ...            ...          ...        ...           ...             ...             ...                  ...        ...               ...              ...                   ...                  ...                     ...                     ...                    ...                     ...                        ...\n",
       "745,211  VTS          2013-01-01 11:44:00.000000000  2013-01-01 11:48:00.000000000  2                  1.01             -73.977867          40.752242          1.0          nan                   -73.984707           40.74635            CSH             5.5            0.0          0.5        0.0           0.0             6.0             4.0                  11         1                 0                0.4857721080797788    -130.74172519596877  0.0016677981982832183   -0.0007634001124341561  -0.008238445770749805  0.0022837314225409282   1                          6.7908121204710366\n",
       "745,212  VTS          2013-01-01 12:30:00.000000000  2013-01-01 12:35:00.000000000  3                  1.43             -73.97408799999998  40.755217          1.0          nan                   -73.985583           40.74194            CRD             6.5            0.0          0.5        1.3           0.0             8.3             5.0                  12         1                 0                0.8336106186269362    -139.11454490848595  0.0064363219895815765   -0.0013900155064279053  -0.012308814501983484  0.00037386902126578404  1                          8.219501134401456\n",
       "745,213  VTS          2013-01-01 12:27:00.000000000  2013-01-01 12:38:00.000000000  1                  3.22             -73.98677999999998  40.74939           1.0          nan                   -74.000422           40.723847           CSH             12.0           0.0          0.5        0.0           0.0             12.5            11.0                 12         1                 0                1.0607977719708614    -151.89422599616444  -0.006598518727099495   0.0036233387091474216   -0.03567689647266646   0.0015926040258109411   1                          15.97530794255817\n",
       "745,214  VTS          2013-01-01 12:28:00.000000000  2013-01-01 12:38:00.000000000  4                  3.02             -73.98922299999998  40.753687          1.0          nan                   -74.010683           40.713553           CSH             11.0           0.0          0.5        0.0           0.0             11.5            10.0                 12         1                 0                1.668174698555257     -151.86619536928146  -0.005229616762420809   0.008372924523837585    -0.05005047116436957   0.0037501275093600565   1                          14.871115191447839\n",
       "745,215  VTS          2013-01-01 12:35:00.000000000  2013-01-01 12:42:00.000000000  1                  1.54             -73.969925          40.793903          1.0          nan                   -73.945052           40.783328           CSH             7.5            0.0          0.5        0.0           0.0             8.0             7.0                  12         1                 0                1.7303989127304344    113.03324723766758   0.03703009568845854     0.02265074521077055     0.045051966305525375   -0.007717442995512018   1                          8.631783515579428"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# view the transformed test set - it inclides the predictions and all intermediate steps!\n",
    "test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-05-13T09:46:46.029152Z",
     "start_time": "2019-05-13T09:46:40.980947Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The mean absolute error is 2.364\n",
      "The mean squared score is 12.942\n"
     ]
    }
   ],
   "source": [
    "# check the performance\n",
    "from sklearn.metrics import mean_absolute_error, mean_squared_error\n",
    "\n",
    "mae_test_score = mean_absolute_error(test.trip_duration_min.values, test.lightgbm_prediction.values)\n",
    "mse_test_score = mean_squared_error(test.trip_duration_min.values, test.lightgbm_prediction.values)\n",
    "\n",
    "print('The mean absolute error is %2.3f' % mae_test_score)\n",
    "print('The mean squared score is %2.3f' % mse_test_score)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### End of part 4"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
